{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e1af2089",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.preprocessing import OrdinalEncoder\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "28c1ca29",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python honorsproject2.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2b9519b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "all_files = glob.glob(\"Storing_csv_files_2\" + \"/*.csv\")\n",
    "frame = []\n",
    "for filename in all_files:\n",
    "    df = pd.read_csv(filename)\n",
    "    frame.append(df)\n",
    "original_df = pd.concat(frame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "39c4be18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "20"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2b808fa7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Summary</th>\n",
       "      <th>Issue id</th>\n",
       "      <th>Status</th>\n",
       "      <th>Assignee</th>\n",
       "      <th>Updated</th>\n",
       "      <th>Last Viewed</th>\n",
       "      <th>Original estimate</th>\n",
       "      <th>Time Spent</th>\n",
       "      <th>Custom field (Start date)</th>\n",
       "      <th>...</th>\n",
       "      <th>Random_Assignee</th>\n",
       "      <th>percent_random</th>\n",
       "      <th>percent_sign</th>\n",
       "      <th>percentage</th>\n",
       "      <th>Due date</th>\n",
       "      <th>Label</th>\n",
       "      <th>percent_random_time_spent</th>\n",
       "      <th>percent_sign_time_spent</th>\n",
       "      <th>percentage_time_spent</th>\n",
       "      <th>Random_time_spent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Problem statement</td>\n",
       "      <td>10018</td>\n",
       "      <td>Done</td>\n",
       "      <td>Alexey</td>\n",
       "      <td>3/7/2022 13:12</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>12</td>\n",
       "      <td>13.226596</td>\n",
       "      <td>2021-11-01</td>\n",
       "      <td>...</td>\n",
       "      <td>Mushu</td>\n",
       "      <td>0.420958</td>\n",
       "      <td>2</td>\n",
       "      <td>0.841916</td>\n",
       "      <td>2022-01-17</td>\n",
       "      <td>0</td>\n",
       "      <td>0.602216</td>\n",
       "      <td>2</td>\n",
       "      <td>1.204433</td>\n",
       "      <td>13.226596</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Project Proposal</td>\n",
       "      <td>10019</td>\n",
       "      <td>Done</td>\n",
       "      <td>Angie</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24</td>\n",
       "      <td>8.882222</td>\n",
       "      <td>2021-11-28</td>\n",
       "      <td>...</td>\n",
       "      <td>Xiao Qing</td>\n",
       "      <td>1.631317</td>\n",
       "      <td>2</td>\n",
       "      <td>3.262634</td>\n",
       "      <td>2022-02-26</td>\n",
       "      <td>0</td>\n",
       "      <td>0.610278</td>\n",
       "      <td>2</td>\n",
       "      <td>1.220555</td>\n",
       "      <td>8.882222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Assign project tasks to the team</td>\n",
       "      <td>10020</td>\n",
       "      <td>Done</td>\n",
       "      <td>Manoj</td>\n",
       "      <td>3/7/2022 13:13</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>2</td>\n",
       "      <td>2.664339</td>\n",
       "      <td>2021-11-30</td>\n",
       "      <td>...</td>\n",
       "      <td>Frankie</td>\n",
       "      <td>0.521741</td>\n",
       "      <td>2</td>\n",
       "      <td>1.043482</td>\n",
       "      <td>2022-02-27</td>\n",
       "      <td>0</td>\n",
       "      <td>0.832170</td>\n",
       "      <td>2</td>\n",
       "      <td>1.664339</td>\n",
       "      <td>2.664339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Discuss budgeting</td>\n",
       "      <td>10021</td>\n",
       "      <td>Done</td>\n",
       "      <td>Violet</td>\n",
       "      <td>3/7/2022 13:04</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>4</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2021-12-01</td>\n",
       "      <td>...</td>\n",
       "      <td>Alexey</td>\n",
       "      <td>1.072196</td>\n",
       "      <td>2</td>\n",
       "      <td>2.144393</td>\n",
       "      <td>2022-04-01</td>\n",
       "      <td>0</td>\n",
       "      <td>0.272547</td>\n",
       "      <td>2</td>\n",
       "      <td>0.545095</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Software Requirements Document</td>\n",
       "      <td>10022</td>\n",
       "      <td>Done</td>\n",
       "      <td>Latifah</td>\n",
       "      <td>3/7/2022 13:04</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>48</td>\n",
       "      <td>14.929181</td>\n",
       "      <td>2021-12-02</td>\n",
       "      <td>...</td>\n",
       "      <td>Angie</td>\n",
       "      <td>0.441640</td>\n",
       "      <td>2</td>\n",
       "      <td>0.883280</td>\n",
       "      <td>2022-02-24</td>\n",
       "      <td>1</td>\n",
       "      <td>0.188975</td>\n",
       "      <td>-2</td>\n",
       "      <td>-0.377951</td>\n",
       "      <td>14.929181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>View Foreign Exchange account</td>\n",
       "      <td>10033</td>\n",
       "      <td>Done</td>\n",
       "      <td>Mulan</td>\n",
       "      <td>3/7/2022 13:07</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>36</td>\n",
       "      <td>-46.946162</td>\n",
       "      <td>2022-03-13</td>\n",
       "      <td>...</td>\n",
       "      <td>Violet</td>\n",
       "      <td>1.026610</td>\n",
       "      <td>2</td>\n",
       "      <td>2.053221</td>\n",
       "      <td>2022-05-25</td>\n",
       "      <td>0</td>\n",
       "      <td>1.456090</td>\n",
       "      <td>2</td>\n",
       "      <td>2.912180</td>\n",
       "      <td>-46.946162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>Exchange currency tab</td>\n",
       "      <td>10034</td>\n",
       "      <td>Done</td>\n",
       "      <td>Mushu</td>\n",
       "      <td>3/7/2022 12:58</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>48</td>\n",
       "      <td>39.858745</td>\n",
       "      <td>2022-04-04</td>\n",
       "      <td>...</td>\n",
       "      <td>Mushu</td>\n",
       "      <td>0.065802</td>\n",
       "      <td>-2</td>\n",
       "      <td>-0.131603</td>\n",
       "      <td>2022-04-19</td>\n",
       "      <td>0</td>\n",
       "      <td>1.492937</td>\n",
       "      <td>2</td>\n",
       "      <td>2.985875</td>\n",
       "      <td>39.858745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>User testing</td>\n",
       "      <td>10035</td>\n",
       "      <td>Done</td>\n",
       "      <td>Shang</td>\n",
       "      <td>3/7/2022 12:56</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>8</td>\n",
       "      <td>-3.028330</td>\n",
       "      <td>2022-04-15</td>\n",
       "      <td>...</td>\n",
       "      <td>DJ</td>\n",
       "      <td>0.111901</td>\n",
       "      <td>-2</td>\n",
       "      <td>-0.223802</td>\n",
       "      <td>2022-05-12</td>\n",
       "      <td>0</td>\n",
       "      <td>1.014165</td>\n",
       "      <td>2</td>\n",
       "      <td>2.028330</td>\n",
       "      <td>-3.028330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>Vendors testing the software for any bugs</td>\n",
       "      <td>10036</td>\n",
       "      <td>Done</td>\n",
       "      <td>Jisoo</td>\n",
       "      <td>3/7/2022 12:56</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>24</td>\n",
       "      <td>23.172915</td>\n",
       "      <td>2022-04-26</td>\n",
       "      <td>...</td>\n",
       "      <td>Frankie</td>\n",
       "      <td>0.868540</td>\n",
       "      <td>2</td>\n",
       "      <td>1.737081</td>\n",
       "      <td>2022-07-03</td>\n",
       "      <td>0</td>\n",
       "      <td>0.658646</td>\n",
       "      <td>2</td>\n",
       "      <td>1.317292</td>\n",
       "      <td>23.172915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>Releasing 2.0</td>\n",
       "      <td>10037</td>\n",
       "      <td>Done</td>\n",
       "      <td>Frankie</td>\n",
       "      <td>3/7/2022 13:06</td>\n",
       "      <td>3/7/2022 13:14</td>\n",
       "      <td>9</td>\n",
       "      <td>10.910075</td>\n",
       "      <td>2022-05-24</td>\n",
       "      <td>...</td>\n",
       "      <td>Mushu</td>\n",
       "      <td>1.766403</td>\n",
       "      <td>2</td>\n",
       "      <td>3.532806</td>\n",
       "      <td>2022-09-23</td>\n",
       "      <td>0</td>\n",
       "      <td>0.591007</td>\n",
       "      <td>2</td>\n",
       "      <td>1.182015</td>\n",
       "      <td>10.910075</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>20000 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                    Summary  Issue id Status  \\\n",
       "0            0                          Problem statement     10018   Done   \n",
       "1            1                           Project Proposal     10019   Done   \n",
       "2            2           Assign project tasks to the team     10020   Done   \n",
       "3            3                          Discuss budgeting     10021   Done   \n",
       "4            4             Software Requirements Document     10022   Done   \n",
       "..         ...                                        ...       ...    ...   \n",
       "15          15              View Foreign Exchange account     10033   Done   \n",
       "16          16                      Exchange currency tab     10034   Done   \n",
       "17          17                               User testing     10035   Done   \n",
       "18          18  Vendors testing the software for any bugs     10036   Done   \n",
       "19          19                              Releasing 2.0     10037   Done   \n",
       "\n",
       "   Assignee         Updated     Last Viewed  Original estimate  Time Spent  \\\n",
       "0    Alexey  3/7/2022 13:12  3/7/2022 13:14                 12   13.226596   \n",
       "1     Angie             NaN             NaN                 24    8.882222   \n",
       "2     Manoj  3/7/2022 13:13  3/7/2022 13:14                  2    2.664339   \n",
       "3    Violet  3/7/2022 13:04  3/7/2022 13:14                  4    0.000000   \n",
       "4   Latifah  3/7/2022 13:04  3/7/2022 13:14                 48   14.929181   \n",
       "..      ...             ...             ...                ...         ...   \n",
       "15    Mulan  3/7/2022 13:07  3/7/2022 13:14                 36  -46.946162   \n",
       "16    Mushu  3/7/2022 12:58  3/7/2022 13:14                 48   39.858745   \n",
       "17    Shang  3/7/2022 12:56  3/7/2022 13:14                  8   -3.028330   \n",
       "18    Jisoo  3/7/2022 12:56  3/7/2022 13:14                 24   23.172915   \n",
       "19  Frankie  3/7/2022 13:06  3/7/2022 13:14                  9   10.910075   \n",
       "\n",
       "   Custom field (Start date)  ...  Random_Assignee percent_random  \\\n",
       "0                 2021-11-01  ...            Mushu       0.420958   \n",
       "1                 2021-11-28  ...        Xiao Qing       1.631317   \n",
       "2                 2021-11-30  ...          Frankie       0.521741   \n",
       "3                 2021-12-01  ...           Alexey       1.072196   \n",
       "4                 2021-12-02  ...            Angie       0.441640   \n",
       "..                       ...  ...              ...            ...   \n",
       "15                2022-03-13  ...           Violet       1.026610   \n",
       "16                2022-04-04  ...            Mushu       0.065802   \n",
       "17                2022-04-15  ...               DJ       0.111901   \n",
       "18                2022-04-26  ...          Frankie       0.868540   \n",
       "19                2022-05-24  ...            Mushu       1.766403   \n",
       "\n",
       "    percent_sign  percentage    Due date  Label  percent_random_time_spent  \\\n",
       "0              2    0.841916  2022-01-17      0                   0.602216   \n",
       "1              2    3.262634  2022-02-26      0                   0.610278   \n",
       "2              2    1.043482  2022-02-27      0                   0.832170   \n",
       "3              2    2.144393  2022-04-01      0                   0.272547   \n",
       "4              2    0.883280  2022-02-24      1                   0.188975   \n",
       "..           ...         ...         ...    ...                        ...   \n",
       "15             2    2.053221  2022-05-25      0                   1.456090   \n",
       "16            -2   -0.131603  2022-04-19      0                   1.492937   \n",
       "17            -2   -0.223802  2022-05-12      0                   1.014165   \n",
       "18             2    1.737081  2022-07-03      0                   0.658646   \n",
       "19             2    3.532806  2022-09-23      0                   0.591007   \n",
       "\n",
       "    percent_sign_time_spent percentage_time_spent  Random_time_spent  \n",
       "0                         2              1.204433          13.226596  \n",
       "1                         2              1.220555           8.882222  \n",
       "2                         2              1.664339           2.664339  \n",
       "3                         2              0.545095           0.000000  \n",
       "4                        -2             -0.377951          14.929181  \n",
       "..                      ...                   ...                ...  \n",
       "15                        2              2.912180         -46.946162  \n",
       "16                        2              2.985875          39.858745  \n",
       "17                        2              2.028330          -3.028330  \n",
       "18                        2              1.317292          23.172915  \n",
       "19                        2              1.182015          10.910075  \n",
       "\n",
       "[20000 rows x 24 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8ce5ee6b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Status Random_Assignee\n",
      "0    Done           Mushu\n",
      "1    Done       Xiao Qing\n",
      "2    Done         Frankie\n",
      "3    Done          Alexey\n",
      "4    Done           Angie\n",
      "..    ...             ...\n",
      "15   Done          Violet\n",
      "16   Done           Mushu\n",
      "17   Done              DJ\n",
      "18   Done         Frankie\n",
      "19   Done           Mushu\n",
      "\n",
      "[20000 rows x 2 columns]\n",
      "[[ 0. 15.]\n",
      " [ 0. 18.]\n",
      " [ 0.  6.]\n",
      " ...\n",
      " [ 0.  4.]\n",
      " [ 0.  6.]\n",
      " [ 0. 15.]]\n"
     ]
    }
   ],
   "source": [
    "original_df['Status'].fillna('Done', inplace = True)\n",
    "transform_df = original_df[['Status','Random_Assignee']]\n",
    "print(transform_df)\n",
    "\n",
    "encoder = OrdinalEncoder()\n",
    "encoder.fit(transform_df)\n",
    "final_df = encoder.transform(transform_df)\n",
    "print(final_df)\n",
    "#final_df = pd.DataFrame(final_df, columns = ['Status', 'Label(Binary) Project with all tasks', 'Assignee' ])\n",
    "#final_df = pd.concat([final_df, original_df], axis = 1, ignore_indexing = True)\n",
    "#final_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9ca43fc9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_df['diff_dates'].isnull().any().any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b1b1f287",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     -1.226596\n",
       "1     15.117778\n",
       "2     -0.664339\n",
       "3      4.000000\n",
       "4     33.070819\n",
       "        ...    \n",
       "15    82.946162\n",
       "16     8.141255\n",
       "17    11.028330\n",
       "18     0.827085\n",
       "19    -1.910075\n",
       "Name: diff_time_spent, Length: 20000, dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "original_df['diff_time_spent']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "bad6a17d",
   "metadata": {},
   "outputs": [],
   "source": [
    "arr = original_df[['diff_dates','percent_random', 'Original estimate', 'diff_time_spent', 'Time Spent','Label']].to_numpy()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f9c5058b",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = np.hstack((final_df,arr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6472e70f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.        , 15.        , 27.        , ..., -1.22659592,\n",
       "        13.22659592,  0.        ],\n",
       "       [ 0.        , 18.        , 17.        , ..., 15.11777844,\n",
       "         8.88222156,  0.        ],\n",
       "       [ 0.        ,  6.        , 29.        , ..., -0.66433925,\n",
       "         2.66433925,  0.        ],\n",
       "       ...,\n",
       "       [ 0.        ,  4.        , 15.        , ..., 11.02832991,\n",
       "        -3.02832991,  0.        ],\n",
       "       [ 0.        ,  6.        , 18.        , ...,  0.82708486,\n",
       "        23.17291514,  0.        ],\n",
       "       [ 0.        , 15.        , 22.        , ..., -1.91007475,\n",
       "        10.91007475,  0.        ]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "16119c0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(14000, 6)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(dataset[:,0:6], dataset[:,-1], test_size=0.3, random_state=32)\n",
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "39f2fb18",
   "metadata": {},
   "outputs": [],
   "source": [
    "dict = {'Name':['Train', 'Test'], 'count' : [len(X_train), len(X_test)]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "7a5e42d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Name': ['Train', 'Test'], 'count': [14000, 6000]}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c6aa9fb9",
   "metadata": {},
   "outputs": [],
   "source": [
    " countDF = pd.DataFrame(dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "144dd017",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Train</td>\n",
       "      <td>14000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Test</td>\n",
       "      <td>6000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Name  count\n",
       "0  Train  14000\n",
       "1   Test   6000"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "countDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c78ecf87",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([<matplotlib.patches.Wedge at 0x225b65523a0>,\n",
       "  <matplotlib.patches.Wedge at 0x225b5421ac0>],\n",
       " [Text(-0.6465637441936393, 0.8899187180267096, 'Train'),\n",
       "  Text(0.6465637441936392, -0.8899187180267097, 'Test')],\n",
       " [Text(-0.3526711331965305, 0.4854102098327506, '70.0%'),\n",
       "  Text(0.3526711331965304, -0.48541020983275074, '30.0%')])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYV0lEQVR4nO3deZgU1b3G8e/p7plBZmAQFBS3ckMFFXHFJYjbNddyjVtu3DdEvRqTXE1db2I6JsYy5vGauEaTuMctyVWxREXct6ioLKIgaKEgsk/DDLN1d90/qtEJA8IM03VOdf0+zzOPgtNz3knmnVNVXXWOCoIAIYR5UroDCCHWTMophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYSsophKGknEIYKqM7gFg7y/EUsDWwE7ANMKj0sSmwcemjFkgT/qLt+M82ILfaRwOwAPgM8IHPfNdeENX3I7pGyUZGZrAcbwtgf2BPwjIOAXYAepV56JWERZ0OvAtMAt7zXXtpmccV6yDl1MByvAywD3AAYSFHAltoDdXZZ4RFfRl4znftmZrzJI6UMyKW420C/DtgA0cC/bQG6rrPgQnAc8BE37WXaM5T8aScZWQ53lbAacBxwL5UzgW4AjAReBj4h+/aOc15KpKUs4dZjlcHnAicCYymcgq5Nq3AeMKiPum7drPmPBXDuHIqpQYQ/lYG2Izwt/Si0p/3DYKg7VteuzdwZhAEl5U3ZWeW440ELiIsZm3U4xtiGfBn4FbftX3NWWLPuHJ2pJTKAo1BEPyuw99lgiDI60v1DcvxUsDxwE8IL+6IUBF4CrjZd+3ndYeJq1iUE9gVWAqMAN4DHgFuAjYCmoFzgiCYoZQaDfxXEARHl167NbBd6Z83BUHwh57IZTleLXAOcDmwfU98zQo2HbgOeMh37YLuMHESp5sQhgCHB0FQUEr1BUYFQZBXSh0O/IbwcHJ1OwOHAH2AGUqp24MgaO9uAMvxqgkPXf+H8EYAsW5DgfuBqy3H+xXwoO/aRc2ZYiFOFyseC4Jg1W/eeuAxpdQ04H+BYWt5jRcEQWsQBIuBhYR313SZ5Xgpy/HOBGYQzthSzK7bEbgPmGo53gm6w8RBnMrZ1OHffwW8GATBrsAxrP0umtYO/16gG0cKluMdBXwA3AtYXX296GQo8A/L8V62HG833WFMFqfD2o7qgXmlfz+7HANYjrclcAvhe5Si540C3rMc71bgat+1l+sOZJo4zZwd/Ra4Tin1OuFN3j3Gcry05XiXAx8hxSy3DPBDYIbleGfoDmMao6/WRs1yvBHAXcBeurMk1ATgHN+1563zMxNAykk4WxJegf058T3UrxTLgLG+az+qO4huiS+n5XgW8CByE4FpHgQuSfJ9u4kup+V4JxLeblavO4tYoy+A7/uu/YbuIDokspyl5ylvBC7VnUWsUxvwn75r36U7SNQSV07L8TYGHgMO051FdMkfgUt91+72HV5xk6hyWo63I+EN2UN0ZxHd8jpwku/aX+kOEoXElNNyvEOBvxEuiiXiay5wpO/a03UHKbe43oTQJaX7Yp9FilkJtgResRxvX91Byq3iy2k53ljgHuT9y0oyAJhoOd7huoOUU0WXs3Qb3u2A0hxF9Lw6wLMc7yTdQcqlYstpOd5VhI+TicpVDTxiOd7puoOUQ0VeELIc7xdAVncOEZk8cLLv2o/rDtKTKq6cluNdQviol0iWVuDoSlqzqKLKaTneyYRLNFbs4br4Vk3AEb5rv6k7SE+omHKW3sccT3geIpKrARjlu/ZU3UE2VEWUs/Qc5ktAX81RhBl8YB/ftRfrDrIhYn/4ZzneQOBJpJjiGxbwd8vxqnQH2RCxLmfp6ZJHCO8aEaKjUcT8wmCsy0m4ltBo3SGEscaUrt7HUmzPOS3H+z7wkO4cwnh5wgtEsbuCG8tyWo43FHib5G4YJLrmU2AP37VX6A7SFbE7rC2d5N+PFFOsv+2AW3WH6KrYlRP4GbCn7hAids4onQrFRqwOay3H2xt4E3n8S3RPDhjuu/Yc3UHWR2xmTsvxehFuhCPFFN1VT7jaYizEppzANcAuukOI2DssLls/xOKw1nK8XYDJQGR3fLQvmcuiJ6//+s/5hq/od9Dp1O56KIufuJ788gVk+g5ik+Md0r3qOr2++dNJLJ14JxSL1A3/N+pHngzAspfupvnTSVQP3JZNjv4JAI3TXqDYsoK+e8vWLBFZBOzsu/ZS3UG+TVxmzt8TYTEBqgZsyeBzbmbwOTez+Vk3oapq6D1kf5a/9Ri9rOFsMeYuelnDWf7WY51eGxQLLJ1wOwNP/iWDz7+Npukv07b4c4qtTbTO+4jB595CEBRpW+RTbG+ladrz9BlhR/ntJd2mwLW6Q6yL8eW0HO944AidGVrmTKaq3+Zk6geyctY/qd01XPK2dtfDWPnJW50+v23+TDL9Nqeq32aodBW1u4yi+ZO3AEVQyBMEAUG+DZVKs/ztf9Bnr2NRaTmVjtgYy/GM3rDK6HKWLgLdqDtH00ev0HuXUQAUmhrI1PUHIFPXn2JTQ6fPz69YQqbvN5tfp/tsQqFxCama3vTe6QDm33MZmfpBqJpa2ubPpPeOIyP5PsS/SBEekRnL6HICPwG21RkgKLTTPOttanc+aAO/UrjGWP1+JzH4nJvpf+j55F59gH7fOZ0Vk59l0eMuDW88vOGBRVccWNq53EjGltNyvH7AFbpzNH86iepB25OuDZe8Tdf2I98YXkfINy4lVduv02syfQaQX77o6z8XViwmXZptV2lbMDv83I23oGnaC2x6vEP7ojm0L5WtKSP2a8vxjFyd0dhyEu54rH33r6bpL1NbOqQF6L3DfjRNmxj+t2kT6b3Dfp1eU735EPLLvqS94SuCQjtNH73CRqt9XsOrD1B/0GlQzENQDP9SpQjyreX7ZsSajACMXF7TyHJajlcPXK47R7G9hRb/A3rv9M3WnX1HnkSL/z7z7ryAFv99+pbeIsmvWMKCx34BgEql6X/EWBY+ejVf/ukianf+DtWbbvP111g5802qN9uRTJ8BpHrVUTN4Z7788yWgoHrgdtF+kwLgmtIGykYx8n1Oy/GuBn6pO4dIlDN9175fd4iOjJs5TZk1ReL8WHeA1RlXTuACZMMhEb09LMcbrTtER0aV03K8FHCx7hwisS7XHaAjo8oJHI3m9zVFoh1jOZ4xV+RMK+dFugOIREsBl+kOsYoxV2stx7OA2Zj3C0MkyzJgM9+123QHMakI52FWHpFMGwNGPCJkUhn+Q3cAIUpO0x0ADDmstRxvT2CS7hxClLQCg3zXzukMYcrMeYruAEJ0UIMB99uaUs6TdQcQYjU/0B1A+2FtabnLd7SGEKKzPDDAd+3lugKYMHMerzuAEGuQAQ7XGcCEcmr9H0CIb/FdnYNrLafleH2BvXVmEOJbHKlzcN0z58GAcQ+5ClGydWlHOy10l/MwzeMLsS7aTruknEJ8u86LREVEWzlL55vDdI0vxHraV9fAOmfOPVi1mKsQ5trBcjwtK3PoLqcQcbCPjkGlnEKsm5ZDW53lHKFxbCG6YriOQbWU03K8KkDb+0dCdNEOOgbVNXNuA1RrGluIrtpex6A6yylEXPSxHG9g1IPqKufWmsYVorsinz2lnEKsn8jPO6WcQqyfLaMeUMopxPrpv+5P6Vm6yjlA07hCdFdiylmnaVwhuisx5eyjaVwhuivym9+lnEKsn8qfOS3HU0DvqMcVYgPVRj2gjpmzFnmOU8RP5Gtd6SinFFPEUeTlzEQ9INCuYcxE2E9Nn/5Q9bWb6c5RiYqohnDrzuhIOSvIxZknF6ZUII/ilUGKoCH6MSPmu3YB0L/vYAU6IPXhdrozVLDIJxVdb6XI7NnD9lQzP65SBbktsnwSU842TeNWrLGZcV/pzlDhElPOaM+sE+Dg1GSZNctrRdQD6irnAk3jVqShyp9do/JyvlleX0Q9oK5yLtQ0bkUamxkX+Q9OAs2JekCZOSvAEalJg3VnSIDPox5QZs6Y217Nm7ORahuiO0cCJKaccmWxh1yYHveZ7gwJkZjDWvmB6iFHpd+OfMnGhErMzDlT07gVZSu1cF6dapHb9cpvGdlcY9SD6irnbKCgaeyKMSb91CzdGRIi8lkTNJXTd+02woKKDXBs+g0t+0YmUOTnm6B3l7FpGseOvc1YuqAvK3fTnSMhtByh6CznVI1jx965madnKCUPrkfkdR2D6iznPzWOHXsnpl+VRdKi86qOQXWW803kuc5u6U9uSX9W7K47R0J8TDa3SMfA2srpu3YD8KGu8ePs7MyzHykV/Zo2CaVl1gS9MydoOpaPu1PTL/XSnSFBXtE1sJQzZvrSmBtIw3DdORIksTOntt9KcXV6euI0pajSnSMhPieb0/IeJ2gup+/ac5Dzzi75QWaijhUTk0rbrAn6Z06AcboDxEUtzY1bsFgOaaOj9cjOhHI+qTtAXJyafmmKUsjFoOg8r3NwE8r5T+Th6/VyRvo5uSMoOm+QzX2qM4D2cvquXQQ83TlM14vWZkstkBsPonOf7gDay1nymO4Apjsh/doUpaLfhi6hWoBHdIcwpZzPAV/qDmGys9PP5nVnSJBxZHMNukMYUc7S/inaDyNMVUW+bYiau6vuHAlixM+iEeUsuVt3AFMdnXpzslLU686REAuBZ3SHAIPK6bv2TOR2vjU6LzO+RXeGBPkr2ZwRpxDGlLPkL7oDmCZNIT9UzRmmO0eC3Ks7wCqmlfOvyHue/+KI1LtTUirorztHQkwlm/tAd4hVjCqn79otwC26c5hkTMZr0p0hQX6vO0BHRpWz5FZAfiABRbE4XM3eWXeOhPgcQ67SrmJcOX3XXgr8WXcOE4xOTZ6aVsGmunMkxG/J5ozacd3Ux49uBC7G3HyRGJN+qiHK8VryAaPubqK1APkinLRLhl8e0oulzQGn/m0lfkOA1U/x6Em92Xijzrf5PjMrzw+faaFQDDh/z2qcg2oA+OmEFsbPyrPHZmnuO2EjAO6f3MbS5oAfjqyJ8ltcm/kYOCEYN3PC1895GnWIEb0g2Cc1Y8coR6xJwwtn1TJ5bB0fXFjLM7PzvDU3j/taK4dtm+GTS+s4bNsM7mutnV5bKAZc8nQz40/rzfRL6nhoWjvTFxXItQS8MbfAlIvqKAQBUxcUaG4PuGdyOxfvUx3lt/dtbiCbM+7tKiPLWXI10Kw7hC4jU9OnZ1Qx0n03lVLUVYczYnsR2guggCdm5DlreLj4wlnDq3h8Rue3Ad+eV2CH/im22zhFdVrx/WFVPPFxnpSCtkJAEAQ0t0NVGm54o43L9q2mKm3EQzZfALfpDrEmxpbTd+15wE26c+gyNj1usY5xC8WAPe5oZOANKzhiuwz7bZlhQWORzfuEPyqb90mxsKnY6XXzVgRs1febH6ct+yrmrSjSp0Zx4i5VjPhjE9v2S1Ffo3jnywLH7WzMSiu/IJvrfChgANPP6VzgAmAT3UGidmDqw211jJtOKT4YW0dDS8AJj6xk2sL1228qWMMKxKvmxSsPrOHKA8Nzy/OfbOaa0TX86b02npudZ/dBaX42Stt553QMPn0yduYE8F17OfAr3TmiNkJ9MqNKFbbWmaFfL8XobTI8MyvPoLoU81eEs+X8FUUG1nb+sdmyr+KL5d/MqHOXBwzu86+f9/78sOhDBqS4b3I7j57cm2kLC3yyRNuGc1eRzRm7253R5Sy5HfhYd4gojc2Mm69j3EVNRRpawimwuT3g+c/y7LxJimOHZLh3cvguw72T2zlup84HXPtskeaTJUU+W1akrRDw8IftHLva5/38xVauOaSG9iIUSjNtSsFKPW9gPEc294SWkdeT8eX0XbsdGEOCtm4YnZq8lY5x5zcGHHJvE7vf3sg+dzVxxHYZjh5ShXNQNRM+zbPjzY1M+DT/9VskX64octSDKwHIpBS3HNWLIx9YyS63NnLK0CqGDfxmUfrHP25nn8FpBvdJ0a+XYv8t0+x2eyNKwfDNIl+8fjlwftSDdpUK1nSyYCDL8e4ALtSdo9yGKn/20zVXba87R4U7n2zOuPc1V2f8zNnBlSRgtYSxmXFf6M5Q4cbHoZgQo3KWLg5drDtHuR2emrS57gwVLEd49T8WYlNOAN+1n8CAhZfKZTv15Zzeqm0n3Tkq2I/I5ubpDrG+YlXOkguBz3SHKIcL0+Mq8vsyxNNkc7FaCid25fRdOwecChj1BEFPOCr9tjyBUh4NhFf8YyV25QTwXfsdwNGdoydtpRbO66OaZTmS8rg0Toezq8SynAC+a99IBW2CdEHam6U7Q4W6gWzuAd0huiO25Sw5G5itO0RPODb9xsa6M1Sg/wN+qjtEd8W6nKVVE44lvOMjtgaxdGE9TbJodM96FzidbC4ed9msQazLCeC79nTCC0TG3sC8Ludmxn+sVPz/vzDI58AxZHMrdQfZEBXxA+G79jPE+AaFE9Ov9tGdoYIsB44mm/tKd5ANVRHlBPBd+07gOt05uqo/uSUDWC5b+/WMAnAq2dxU3UF6QsWUE8B37auI2bq3Z2Wem64UkT+WUaEuI5szYp+TnlBR5Sy5DPij7hDr69T0SxvpzlAhfkM2Z+RaQN1VceX0XTsALsLApQ5X14em3CCWySHthruSbO5/dIfoaRVXTvi6oGMweH0YgNPSE6cphTHrQ8ZQATiPbO4G3UHKoSLLCeC7dhE4h3CZEyOdnnnemCXoYqgVOJlsrmJ3povNSggbwnK8/wZ+oztHR7U0N06rOS+jFL10Z4mhRuB4srmJuoOUU8XOnB35rn0dcBYGPclySvqlqVLMblkMHFrpxYSElBPAd+37ABtYoTsLwBnpCbojxNFcYBTZ3Du6g0QhMeUE8F17AnAAMFNnjhraWrZVX+2mM0MMTQIOJJv7SHeQqCSqnAC+a08D9gb+rivD99KvTlGKOl3jx9BthMX8XHeQKCXigtDaWI73Y+B6It6W4tnqK1/fKTX3wCjHjKkVwAVkcxW7btS3SdzM2VHpge1DgMiekq8i3zZEzZXHw9btPWDvpBYTEl5OAN+1XwN2Be6JYjw79dYUpaiPYqyYKgDXAiPJ5rReG9At0Ye1q7MczwbuBMq2L+a46qte3S3lf6dcXz/mZgNnks29oTuICRI/c3bku7YHDAPuLcfXT1EsDFNzhpbja8dcO+FerHusq5hKqQFKqQ9KH18ppeZ1+PM6b4VUSo1WSh3QQ7nLSmbOtbAc7xDCH5geuzH9u6m337+j+qYRPfX1KkS4zk8290lXX6iUygKNQRD8rpyv0UVmzrXwXftFYAThItaLeuJrXpDxjLgBwhDvAgeTzX2vO8XsSCm1l1LqZaXUJKXUs0qpzUt/f5lSarpSaopS6mGllAWMBX5UmmmNPr2QmXM9WI5XD/yM8FnRbj1FoigWZ9WcsTitgoE9Gi5+vgCuAh7c0MW3SrNgE3ACcFwQBIuUUqcCRwZBcK5S6ktg2yAIWpVS/YIgaIjTzGn6tvNGKK0yf4XleDcTLrV4HtClvdIPTk2ZllZBkp/dbARc4EayueYe/Lo1hFfbJyilANLAqs2HpwAPKqUeBx7vwTEjIeXsAt+1PwcusRzv18AVhIe8vdfntWPSTy0rZzaDLQX+AvyObG5BGb6+Aj4MgmD/Nfw3GxhFuHzqz5VSsVpRX845u8F37fm+a/8YsAgXFVvHOWkQ7Jv6eMfyJzPKO4TP025BNndFmYoJ4XOdmyql9gdQSlUppYYppVLAVkEQvEi4t2s/oI7wrqNYrHYo55w9wHK8auB7hDPp6NX/+8jUhx8+XH1trH5rd1Mz8DBwG9ncu+UebNX5I/A88AegnvBo8CbCm0peLP2dAh4IgsBVSg0B/gYUgUuDIHi13Dm7S8rZwyzH24lwiZQzgU0A7q66/qVD0pNHa4xVbp8AdwB3k80l9fC9x0k5y8RyvAxwGHDKRzVn7bWRah+uO1MPmwmMJ9xM6oU4b3tgKilnFLL1aeBA4BjCixND9AbqlkbgBeAZ4BmyOdnot8yknDpk64cABwN7lT52o4tvzURkKqvKCK+RzbVpzpMoUk4TZOurCN+r25NvCrs7RLbGUCvwEeH7glNL//yAbG5hROOLNZBymipbnwGGAjsAmxJeXOr40fHvaju8MiC8arpyLR854DPCJ0BmA7OAL8jmimX/nkSXSDkrQba+F+HNEM09fPeN0EjKKYSh5A4hIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQwl5RTCUFJOIQz1/4L40Sovp94/AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.pie(countDF['count'], labels = countDF['Name'],autopct = '%.1f%%')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "b1fda7af",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "neigh = KNeighborsClassifier(n_neighbors= 2 )\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)\n",
    "y_true=y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "4a52d88f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "cm= confusion_matrix(y_true, y_testing_prediction)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "f56bd3c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUkAAAE+CAYAAAAAreLRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAa30lEQVR4nO3de7xd07338c83O/cQgkgjUcWJqssplUQcVFq3uDziQdqoSzhp0+Og2moV1Tr0xHH0coo+UnGNa5oWR5RQdl3qCEmKNhJCUOzIEeJSBNmX3/PHmtJl22vsS+faa++1v2+v+VpzjXkb67Vf+fmNOcYcUxGBmZm1rFelK2Bm1pU5SJqZJThImpklOEiamSU4SJqZJThImpkl9K50BZIkj08yq4QIdeSw+teea/e/2T6bbNWha3WWrh0kgfpXn610FawD+gzdmt59Nqt0NayDGjp6YFNjntXoErp8kDSzbiSaKl2D3DlImll+mhwkzcxKCmeSZmYJziTNzBKqMJP0OEkzswRnkmaWHw8BMjNLqMLmtoOkmeXHHTdmZqV5CJCZWYozSTOzBGeSZmYJVdi77XGSZpafaGr/0gaS/iJpsaTHJS3KyjaSdLekZ7LPIUX7nyFpuaRlkvYvKt8lO89ySRdJanWaNgdJM8tPU1P7l7b7QkTsFBGjs++nA7URMQqozb4jaTtgMrA9MAG4RFJNdswMYBowKlsmtHZRB0kzy0+ZMskSJgKzsvVZwKFF5bMj4oOIeB5YDoyVNBwYHBHzIyKAa4qOKclB0szyU75MMoDfSfqjpGlZ2bCIWAmQfW6alY8AXio6ti4rG5GtNy9PcseNmeUmov0dN1nQm1ZUNDMiZjbbbfeIeFnSpsDdkp5KnbKlqiXKkxwkzSw/HWg+ZwGxeVBsvs/L2ecqSbcAY4FXJA2PiJVZU3pVtnsdsHnR4SOBl7PykS2UJ7m5bWb5KUNzW9IgSet/uA7sBzwBzAWmZLtNAW7N1ucCkyX1k7QlhQ6aBVmT/G1J47Je7WOLjinJmaSZ5ac8g8mHAbdko3V6AzdExJ2SFgJzJE0FXgQmAUTEEklzgKUU3ml2YvztPsAJwNXAAGBetiSp0MnTRUnhtyV2T35bYvfWsHZFh17z+v7Cm9odUPqPObxLv1LWzW0zswQ3t80sP35228wswbMAmZklOJM0M0twJmlmluAgaWZWWkceS+zqHCTNLD/OJM3MEtxxY2aW4EzSzCzBmaSZWYIzSTOzBGeSZmYJziTNzBIcJM3MEtzcNjNLqMJM0pPumpklOJM0s/y4uW1mllCFzW0HSTPLjzNJM7MEZ5JmZgkOkmZmCdHu1253eQ6SZpYfZ5JmZgkOkmZmCe7dNjNLcCZpZpbgjhszswRnkmZmCQ6SZmYJ7rgxMystmnxP0sysNDe3zcwSqrC57ZnJzcwSnEmaWX58T9LMLMH3JM3MEhwkLWW/w6cwaOBAevXqRU1NDXOuvIiLZ17D7x+cTy/1YqMhGzD9+6ey6dCNAVi2/HnOveAi3nl3Db169WL25RfSr19fjjvpNF577XX69esHwMyfT2fjIRtW8JdZseVPP8zb77xDY2MTDQ0NjNvtwEpXqevwY4nWmisvPp8hG26w7vvxRx3OydOOBeC6X9/KjKtu4OzTTqahoZHTz72A//jBd9l21Fa8+dZf6d27Zt1x5599Gjt8ZptOr7+1zT77TmL16jcqXY2upwozSfdul9l6gwatW3/vvfeRCusPLfgj22y9JduO2gqADTcYTE1NTUunMOs+mqL9SxtJqpH0mKTfZt83knS3pGeyzyFF+54habmkZZL2LyrfRdLibNtF0of/IksrWyYpaVtgIjACCOBlYG5EPFmua1aaJKZ96/tIYtLEA5g0sdAMu/DSq5l7Zy3rDxrElRefD8ALL61Yt/8bb77FAfvsxT8fNWnduX5w3n/Rq1cv9h2/O18/7kja8Le0ThIRzLvjRiKCyy67jsuvuL7SVeo6yjtO8hTgSWBw9v10oDYizpd0evb9e5K2AyYD2wObAfdI2iYiGoEZwDTgYeAOYAIwL3XRsmSSkr4HzAYELAAWZus3Zj+mKl0746f8+qpfMOOnP+LGm3/LoscXA3DK14+j9pZrOWi/L3DDTbcB0NDYyGN/XsJ/nn0a18z4CbX3P8TDix4D4D/PPo1brp3BNZf8mD/+6Qnm3llbsd9kH/f58YcydtcJHPx/juaEE45jzz12rXSVuo4yZZKSRgIHAZcXFU8EZmXrs4BDi8pnR8QHEfE8sBwYK2k4MDgi5kdEANcUHVNSuZrbU4ExEXF+RFyXLecDY7NtJUmaJmmRpEUzy1S5cvmwQ2bjIRuy9+f/icVLl31k+0H7jeee+/4HgGGbbsLonXZkyIYbMKB/f/bcbQxLlz1b2DZ0EwAGDRrIQft+gSeWPt2Jv8Jas3LlKwC8+upqbr11HmPG7FTZCnUh0dTU7qWNfg6cBhQfMCwiVgJkn5tm5SOAl4r2q8vKRmTrzcuTyhUkmyikuc0N56M/8mMiYmZEjI6I0dPKUrXyWPPe+7z77pp16w8teJRRW32KF15asW6fe//wMFtuMRKA3cfuwtPPPs97779PQ0Mjix5fzNZbfpKGhkbeePMtAOobGrj/oUf4h6226PwfZC0aOHAA6603aN36vvvsxZIly1o5qgfpQCZZnBhly0f+6Us6GFgVEX9sYy1aujcVifKkct2T/CZQK+kZ/hbRPwn8A3BSma5ZUatff4NTzvwRAI0NjRy433j2GDeab5757/zlxTrUS2z2iU354XdPBmCDwetz7OTDmDz1FCSx525j2OufxrLmvff5+rfPor6hgabGJsaN2ZkjDplQyZ9mRYYNG8pvfn0FAL171zB79n9z1+/uq2ylupIO3JOMiJlAquG4O3CIpAOB/sBgSdcBr0gaHhErs6b0qmz/OmDzouNHUugTqcvWm5cnKco0rklSLwrN6xEUIngdsDC7edrWk0T9q8+WpX5WXn2Gbk3vPi01Jqw7aFi7okM9he+ee1S7A8qgH17f5mtJGg98JyIOlvRjYHVRx81GEXGapO2BGyjEn82AWmBURDRKWgicDDxCoePm4oi4I3XNsvVuR0QThR4kM+spOnec5PnAHElTgReBSQARsUTSHGAp0ACcWJScnQBcDQyg0Kud7NkGDyY3szyVeYKLiLgPuC9bXw3sXWK/6cD0FsoXATu055oOkmaWnyqcT9JB0szyU4VTpfmxRDOzBGeSZpabdgwO7zYcJM0sP1XY3HaQNLP8OEiamSW4d9vMLMGZpJlZaeEgaWaW4CBpZpbgIUBmZgnOJM3MEhwkzcxKK9f8tJXkIGlm+XEmaWaW4CBpZlaax0mamaU4SJqZJVTfMElPumtmluJM0sxy43uSZmYpDpJmZglVeE/SQdLMcuPmtplZijNJM7PSnEmamaU4kzQzK60K3wPmIGlmOXKQNDMrzZmkmVmKg6SZWWnOJM3MEhwkzcwSHCTNzFJCla5B7hwkzSw31ZhJetJdM7MEZ5JmlptocnPbzKykamxuO0iaWW7CHTdmZqU5kzQzS6jGe5Lu3Taz3ES0f2mNpP6SFkj6k6Qlks7JyjeSdLekZ7LPIUXHnCFpuaRlkvYvKt9F0uJs20WSWo3qDpJmlptoUruXNvgA+GJEfBbYCZggaRxwOlAbEaOA2uw7krYDJgPbAxOASyTVZOeaAUwDRmXLhNYu7iBpZrkpR5CMgneyr32yJYCJwKysfBZwaLY+EZgdER9ExPPAcmCspOHA4IiYHxEBXFN0TEkOkmaWm3I0twEk1Uh6HFgF3B0RjwDDImJl4bqxEtg0230E8FLR4XVZ2YhsvXl5koOkmeWmI5mkpGmSFhUt0z523ojGiNgJGEkhK9whUY2W0tNIlCe5d9vMctORcZIRMROY2cZ935R0H4V7ia9IGh4RK7Om9Kpstzpg86LDRgIvZ+UjWyhPciZpZrmJpvYvrZE0VNKG2foAYB/gKWAuMCXbbQpwa7Y+F5gsqZ+kLSl00CzImuRvSxqX9WofW3RMSa1mkpKGAecBm0XEAVnP0W4RcUXrP8/MepKm8jxxMxyYlfVQ9wLmRMRvJc0H5kiaCrwITAKIiCWS5gBLgQbgxIhozM51AnA1MACYly1JilbunEqaB1wFfD8iPiupN/BYROzY7p/aXlLUv/ps2S9j+eszdGt699ms0tWwDmpYu6JD0W7Ztge0sSvmbz791LwuPQK9Lc3tTSJiDtkrfiKiAWhMH2JmPVGZxklWVFs6bt6VtDFZL1A2iPOtstbKzLqltg7p6U7aEiS/TeFG6NaS/gcYChxR1lqZWbfUHTLD9mo1SEbEo5L2Aj5NYZzRsoioL3vNzMy6gLb0bh/brOhzkoiIa8pUJzPrpsrUu11RbWlujyla7w/sDTxK4blHM7N1euSkuxFxcvF3SRsA15atRmbWbfXUjpvm1lAYwW5m9hE9srkt6Tb+9hB4L2A7YE45K2Vm3VOPbG4DPylabwBeiIi6UjubWc/V45rb2bOSP4iIfTqpPmbWjfW45nZENEpaI2mDiKjIUzZ9hm5dictaDhrqW52FyqpMT21uvw8slnQ38O6HhRHxjbLVqkjfvq1OHGxd0Nq1K9h26OhKV8M66KkOHtfjMsnM7dlSrArvPJjZ36saA0NbguSGEXFhcYGkU8pUHzPrxqoxk2zLVGlTWig7Lud6mFkViFC7l66uZCYp6UjgK8CWkuYWbVofWF3uiplZ99OGtzF0O6nm9kPASmAT4KdF5W8Dfy5npcyse4oWX0jYvZUMkhHxAvACsFvqBJLmR0RyHzPrGZqqsOcmj1fK9s/hHGZWBZqqMJPM45WyVfj/DjOzgjwySTMzoDrvSbaaSUo6SdKQ1C451sfMurGmDixdXVua258AFkqaI2mCpOZB8Zgy1MvMuqFA7V66ulaDZEScRWGS3SsoDCJ/RtJ5krbOtj9R1hqaWbfRUzNJIiKA/82WBmAI8BtJF5SxbmbWzVRjkGzLzOTfoPBo4mvA5cB3I6JeUi/gGeC08lbRzLqL7tB8bq+29G5vAhyWDS5fJyKaJB1cnmqZWXfUVH0xsk1vS/xhYtuT+VbHzLqzahxM7nGSZpabanyyxEHSzHLTHTpi2stB0sxy0/SxYdTdn4OkmeXGzW0zswQ3t83MEnrkECAzs7byECAzs4RqvCeZx6S7ZmZVy5mkmeXG9yTNzBLcu21mllCN9yQdJM0sN9XY3HbHjZnlphyT7kraXNK9kp6UtETSKVn5RpLulvRM9jmk6JgzJC2XtEzS/kXlu0hanG27qIXX0XyMg6SZ5aZMM5M3AKdGxGeAccCJkrYDTgdqI2IUUJt9J9s2GdgemABcIqkmO9cMYBqFV9KMyrYnOUiaWW5C7V9aPWfEyoh4NFt/G3gSGAFMBGZlu80CDs3WJwKzI+KDiHgeWA6MlTQcGBwR87NX0lxTdExJvidpZrkpd++2pE8BOwOPAMMiYiUUAqmkTbPdRgAPFx1Wl5XVZ+vNy5OcSZpZbjrS3JY0TdKiomVaS+eWtB5wE/DNiPhrohot5aeRKE9yJmlmuenIEKCImAnMTO0jqQ+FAHl9RNycFb8iaXiWRQ4HVmXldcDmRYePBF7Oyke2UJ7kTNLMctOk9i+tyXqgrwCejIifFW2aS+FNrmSftxaVT5bUT9KWFDpoFmRN87cljcvOeWzRMSU5kzSz3JTpnuTuwDHAYkmPZ2VnAucDcyRNBV4EJgFExBJJc4ClFHrGT4yIxuy4E4CrgQHAvGxJcpA0s9yUI0hGxIO0fD8RYO8Sx0wHprdQvgjYoT3Xd5A0s9z4sUQzs4RqfCzRQdLMclONswC5d9vMLMGZpJnlxvckzcwSmqowTDpImlluqvGepIOkmeWm+vJIB0kzy5EzSTOzBI+TNDNLcMeNmVlC9YVIB0kzy5HvSZqZJbi5bWaWUH0h0kHSzHLk5raZWYKb22ZmCdUXIh0kzSxHbm6bmSVEFeaSnnTXzCzBmaSZ5cbNbTOzBPduW4dtsMFgLv3lj9l++08TEXxt2qkceuiBHHzQPqxdW89zz73AV7/2bd5666+VrqoBtYtu5d131tDY1ERjQwNH7DcFgKOnfomjpn6JhoZG7r/nQX5y7sX07l3Dv//XWWy347bU9K7h1jl3MPOiqyv7Ayqk+kKkg2Sn+dlPz+Gu393H5CO/Tp8+fRg4cAC1tQ9w1ln/QWNjI+dNP5PvnXYSZ37/vEpX1TLHHvYvvPn6W+u+77r7LnzxgL04ZPyR1K+tZ6NNhgAw4ZB96NO3L4eMP5L+A/px+x/mcPstd7HipZWVqnrFVGMm6Y6bTrD++uuxx567ctVVNwJQX1/PW2/9lXvueYDGxkYAHnnkUUaMGF7JalorJh93OJddNIv6tfUAvP7aGwBEBAMHDqCmpob+/ftTX1/PO2+/W8mqVkxTB5auzkGyE2y15Sd57dXXufyyn7HgkTv55YwfM3DggI/sc9xxX+auu+6tUA2tuYjgijm/4Ka7r+FLx/xfAD619RaMHrcTv5p3Fdf+96XssNN2ANx1Wy1r1rzHHxbP4/eP3saVl1zPW2/2zNsm0YH/urqKBElJx1fiupVS07s3O++8A5fOvJaxu07g3TVrOO27J67bfvr3TqahoZEbbry5grW0Yl85+Kscvs8xfO3IU/jKPx/B6HE7U1NTw+AN1ufLBxzPBedcyM8vK9wa2fFz29PU1MTn//EA9hkzkeNPOIqRW4yo8C+oDGeS+Tmn1AZJ0yQtkrRoZmfWqIxWrFhJXd1KFi58DICbb76dnXbeEYBjjj6CAw/ch2OnnFTJKlozq155DSg0qe+54z7+8XPb88rKVdx9eyHbX/zYUpoiGLLxhhx82AT+8PuHaGho5PXX3uDRBX9ih89+ppLVrxhnku0g6c8llsXAsFLHRcTMiBgdEaOnlatyneyVV16lru5lttlmKwC++IU9ePLJZ9hvv/F85zv/ymGHH897771f4VrahwYM7M+gQQPXre8+fhxPP/ks98y7j133HAPAp7b6JH369OGN1W+ycsX/Mm6PMev2/+wuO/Dc8r9UqvoVVY2ZZDl7t4cB+wNvNCsX8FAZr9slfetbP2DW1RfTt29fnn/+Bb76tVN56KHb6de3L/PuKHToPLLgUU466YwK19Q2Hroxv7j6AgBqanrz25vv5MF759OnT2+mX/hD5t4/m/r6ek4/+d8AuOHKX3PehT/ktgd+hQQ3z76Np5cur+AvqJym6PqZYXspyvSjJF0BXBURD7aw7YaI+EobThJ9+/bMezvd3dq1K9h26OhKV8M66KlVCzv03sOjtzis3QHluhdu7tLvWCxbJhkRUxPbWg+QZtbtVOM4SQ8mN7PcdIeOmPZykDSz3HSHjpj2cpA0s9y4uW1mluDmtplZQjU2t/3stplZgjNJM8tNucZdV5KDpJnlxh03ZmYJvidpZpZQjlmAJF0paZWkJ4rKNpJ0t6Rnss8hRdvOkLRc0jJJ+xeV7yJpcbbtIkltehzSQdLMctNEtHtpg6uBCc3KTgdqI2IUUJt9R9J2wGRg++yYSyTVZMfMAKYBo7Kl+Tlb5CBpZrmJiHYvbTjnA8DrzYonArOy9VnAoUXlsyPig4h4HlgOjJU0HBgcEfOjcNFrio5JcpA0s9x0ZD7J4om2s6UtU8kOi4iVANnnpln5COClov3qsrIR2Xrz8la548bMctORJ24iYiaQ14sIWrrPGInyVjlImlluOnEI0CuShkfEyqwpvSorrwM2L9pvJPByVj6yhfJWubltZrkpxz3JEuYCU7L1KcCtReWTJfWTtCWFDpoFWZP8bUnjsl7tY4uOSXImaWa5KUcmKelGYDywiaQ64GzgfGCOpKnAi8AkgIhYImkOsBRoAE6MiMbsVCdQ6CkfAMzLllY5SJpZbsoxC1BEHFli094l9p8OTG+hfBGwQ3uv7yBpZrmpxheBOUiaWW6qL0Q6SJpZjjzBhZlZQjUGSQ8BMjNLcCZpZrnxpLtmZgnV2Nx2kDSz3PhtiWZmCW5um5kluLltZpbgTNLMLMGZpJlZgjtuzMwSPMGFmVmCM0kzswRnkmZmCc4kzcwSnEmamSU4kzQzS3AmaWaWUI2ZpCfdNTNLcCZpZrmJaKp0FXLnIGlmufGz22ZmCZ4FyMwswZmkmVmCM0kzswSPkzQzS6jGcZIOkmaWGze3zcwS3HFjZpbgTNLMLMEdN2ZmCc4kzcwSfE/SzCzBmaSZWYLvSZqZJVTjYHJPumtmluBM0sxy4+a2mVmCO27MzBKq8Z6kg6SZ5caZpJlZgoOkmVlC9YVIUDVG/u5A0rSImFnpeljH+O/Xc3icZOVMq3QF7O/iv18P4SBpZpbgIGlmluAgWTm+n9W9+e/XQ7jjxswswZmkmVmCg2QFSJogaZmk5ZJOr3R9rO0kXSlplaQnKl0X6xwOkp1MUg3w/4ADgO2AIyVtV9laWTtcDUyodCWs8zhIdr6xwPKIeC4i1gKzgYkVrpO1UUQ8ALxe6XpY53GQ7HwjgJeKvtdlZWbWBTlIdj61UOYhBmZdlINk56sDNi/6PhJ4uUJ1MbNWOEh2voXAKElbSuoLTAbmVrhOZlaCg2Qni4gG4CTgLuBJYE5ELKlsraytJN0IzAc+LalO0tRK18nKy0/cmJklOJM0M0twkDQzS3CQNDNLcJA0M0twkDQzS3CQNDNLcJC0LkXScZJ+Uel6mH3IQdI6RTZFnFm34yBpLZL0I0mnFH2fLukbLew3XtIDkm6RtFTSLyX1yra9I+lcSY8Au0k6WtICSY9LuvTDwCnpeElPS7of2L2zfqNZWzhIWilXAFMAsqA3Gbi+xL5jgVOBHYGtgcOy8kHAExGxK7Aa+DKwe0TsBDQCR0kaDpxDITjuS2EiYrMuo3elK2BdU0T8RdJqSTsDw4DHImJ1id0XRMRzsO7Z5j2A31AIhDdl++wN7AIslAQwAFgF7ArcFxGvZsf/CtimPL/KrP0cJC3lcuA44BPAlYn9mk8A8OH39yOiMVsXMCsizijeUdKhLRxv1mW4uW0pt1B4n8sYCrMWlTI2m/qtF4Um9YMt7FMLHCFpUwBJG0naAngEGC9pY0l9gEm5/gKzv5MzSSspItZKuhd4sygjbMl84HwK9yQfoBBcm59rqaSzgN9lwbQeODEiHpb0b9k5VgKPAu4Jty7DU6VZSVkwexSYFBHPlNhnPPCdiDi4E6tm1mnc3LYWZa+5XQ7UlgqQZj2BM0lrE0k7Atc2K/4gG95jVrUcJM3MEtzcNjNLcJA0M0twkDQzS3CQNDNLcJA0M0v4/zAWhn5+HZgnAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 360x360 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "f, ax =plt.subplots(figsize = (5,5))\n",
    "sns.heatmap(cm,annot = True, linewidths= 0.5, linecolor=\"red\", fmt=\".0f\", ax=ax)\n",
    "plt.xlabel(\"y_pred\")\n",
    "plt.ylabel(\"y_true\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "c243db41",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "         0.0       0.99      1.00      0.99      5370\n",
      "         1.0       0.99      0.90      0.94       630\n",
      "\n",
      "    accuracy                           0.99      6000\n",
      "   macro avg       0.99      0.95      0.97      6000\n",
      "weighted avg       0.99      0.99      0.99      6000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Print out classification report and confusion matrix\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "print(classification_report(y_test, y_testing_prediction))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "ca9b5c24",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9896666666666667"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 3 )\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "64e98e1e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9856666666666667"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 10)\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "50603bdb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9768333333333333"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 20)\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "64772fe8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9625"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neigh = KNeighborsClassifier(n_neighbors= 40)\n",
    "neigh.fit(X_train, y_train)\n",
    "y_testing_prediction = neigh.predict(X_test)\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test, y_testing_prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "bb1cb83c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# do another round of modeling with all of the datasets together, just to see the accuracy's improvement\n",
    "#visualization would be nice as well\n",
    "#Every organization runs a project. Problem there are always risks with these projects\n",
    "# These issues with project management, can cost project managers  $62 million due to human error\n",
    "# I am building a machine learning algorithm, that will detect the risks in projects\n",
    "\n",
    "#on the other side of the poster:\n",
    "# We created project examples in JIRA\n",
    "# We can tell a story about how we cleaned up the data in python\n",
    "# final output can be a spread sheet to mark which project tasks are at risk\n",
    "#open up excel\n",
    "# the project sample, has to show which tasks were at risk\n",
    "original_df.to_csv('./Storing_csv_files_2/{}.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "d923ea38",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K =  2 98.8\n",
      "K =  3 98.88333333333334\n",
      "K =  4 98.96666666666667\n",
      "K =  5 98.8\n",
      "K =  6 99.01666666666667\n",
      "K =  7 98.85000000000001\n",
      "K =  8 98.91666666666666\n",
      "K =  9 98.68333333333334\n",
      "K =  10 98.91666666666666\n",
      "K =  11 98.56666666666666\n",
      "K =  12 98.7\n",
      "K =  13 98.46666666666667\n",
      "K =  14 98.58333333333333\n",
      "K =  15 98.38333333333334\n",
      "K =  16 98.53333333333333\n",
      "K =  17 98.08333333333333\n",
      "K =  18 98.2\n",
      "K =  19 97.96666666666667\n",
      "K =  20 98.01666666666667\n",
      "K =  21 97.68333333333334\n",
      "K =  22 97.75\n",
      "K =  23 97.46666666666667\n",
      "K =  24 97.51666666666667\n",
      "K =  25 97.33333333333334\n",
      "K =  26 97.38333333333333\n",
      "K =  27 97.23333333333333\n",
      "K =  28 97.25\n",
      "K =  29 97.11666666666666\n",
      "K =  30 97.18333333333334\n",
      "K =  31 96.96666666666667\n",
      "K =  32 97.03333333333333\n",
      "K =  33 96.86666666666667\n",
      "K =  34 96.93333333333334\n",
      "K =  35 96.8\n",
      "K =  36 96.8\n",
      "K =  37 96.6\n",
      "K =  38 96.68333333333334\n",
      "K =  39 96.43333333333334\n",
      "K =  40 96.51666666666667\n",
      "K =  41 96.25\n",
      "K =  42 96.35000000000001\n",
      "K =  43 96.16666666666667\n",
      "K =  44 96.3\n",
      "K =  45 96.11666666666666\n",
      "K =  46 96.18333333333334\n",
      "K =  47 96.01666666666667\n",
      "K =  48 96.03333333333333\n",
      "K =  49 95.73333333333333\n",
      "K =  50 95.81666666666668\n",
      "K =  51 95.65\n",
      "K =  52 95.68333333333334\n",
      "K =  53 95.56666666666666\n",
      "K =  54 95.63333333333334\n",
      "K =  55 95.46666666666667\n",
      "K =  56 95.58333333333333\n",
      "K =  57 95.43333333333334\n",
      "K =  58 95.48333333333333\n",
      "K =  59 95.33333333333334\n",
      "K =  60 95.41666666666667\n",
      "K =  61 95.23333333333333\n",
      "K =  62 95.31666666666668\n",
      "K =  63 95.18333333333334\n",
      "K =  64 95.21666666666667\n",
      "K =  65 95.05\n",
      "K =  66 95.11666666666667\n",
      "K =  67 94.98333333333333\n",
      "K =  68 95.03333333333333\n",
      "K =  69 94.85\n",
      "K =  70 94.89999999999999\n",
      "K =  71 94.68333333333334\n",
      "K =  72 94.73333333333333\n",
      "K =  73 94.68333333333334\n",
      "K =  74 94.69999999999999\n",
      "K =  75 94.6\n",
      "K =  76 94.6\n",
      "K =  77 94.56666666666666\n",
      "K =  78 94.55\n",
      "K =  79 94.45\n",
      "K =  80 94.45\n",
      "K =  81 94.33333333333334\n",
      "K =  82 94.35\n",
      "K =  83 94.21666666666667\n",
      "K =  84 94.25\n",
      "K =  85 94.18333333333334\n",
      "K =  86 94.18333333333334\n",
      "K =  87 94.11666666666667\n",
      "K =  88 94.1\n",
      "K =  89 94.03333333333333\n",
      "K =  90 94.06666666666666\n",
      "K =  91 94.0\n",
      "K =  92 94.01666666666667\n",
      "K =  93 93.85\n",
      "K =  94 93.85\n",
      "K =  95 93.81666666666668\n",
      "K =  96 93.86666666666666\n",
      "K =  97 93.7\n",
      "K =  98 93.73333333333333\n",
      "K =  99 93.7\n",
      "K =  100 93.73333333333333\n",
      "95.08100000000006\n"
     ]
    }
   ],
   "source": [
    "sum = 0\n",
    "\n",
    "for x in range (1, 100):\n",
    "    neigh = KNeighborsClassifier(n_neighbors= x)\n",
    "    neigh.fit(X_train, y_train)\n",
    "    y_testing_prediction = neigh.predict(X_test)\n",
    "    x = x + 1\n",
    "    from sklearn.metrics import accuracy_score\n",
    "    sum = sum + accuracy_score(y_test, y_testing_prediction)*100\n",
    "    print(\"K = \",x,accuracy_score(y_test, y_testing_prediction)*100)\n",
    "    avg = sum/x\n",
    "print(avg)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "500283d6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
